package com.atguigu.flink.chapter07.state;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.state.hashmap.HashMapStateBackend;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.SinkFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.flink.streaming.connectors.kafka.KafkaSerializationSchema;
import org.apache.flink.util.Collector;
import org.apache.kafka.clients.producer.ProducerRecord;

import javax.annotation.Nullable;
import java.nio.charset.StandardCharsets;
import java.util.Properties;

/**
 * @Author lizhenchao@atguigu.cn
 * @Date 2021/12/15 9:26
 */
public class Flink11_Kafka_Flink_Kafka {
    public static void main(String[] args) throws Exception {
        System.setProperty("HADOOP_USER_NAME", "atguigu");
        
        Configuration conf = new Configuration();
        conf.setInteger("rest.port", 20000);
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(conf);
        env.setParallelism(1);
        
        env.enableCheckpointing(5000);
        env.setStateBackend(new HashMapStateBackend());
        env.getCheckpointConfig().setCheckpointStorage("hdfs://hadoop162:8020/ck10");
        
        // 设置一致性语义
        env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        
        // 确认 checkpoints 之间的时间会进行 500 ms
        env.getCheckpointConfig().setMinPauseBetweenCheckpoints(500);
        
        // Checkpoint 必须在一分钟内完成，否则就会被抛弃
        env.getCheckpointConfig().setCheckpointTimeout(60000);
        
        // 同一时间只允许一个 checkpoint 进行
        env.getCheckpointConfig().setMaxConcurrentCheckpoints(1);
        
        // 开启在 job 中止后仍然保留的 externalized checkpoints
        env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
        
        Properties sourceProp = new Properties();
        sourceProp.put("bootstrap.servers", "hadoop162:9092");
        sourceProp.put("group.id", "Flink11_Kafka_Flink_Kafka1");
        sourceProp.put("auto.offset.reset", "latest");
        sourceProp.put("isolation.level", "read_committed"); // 防止读取别人未提交的数据
        
        Properties sinkProp = new Properties();
        sinkProp.put("bootstrap.servers", "hadoop162:9092");
        sinkProp.put("transaction.timeout.ms", 15 * 60 * 1000);
        
        SingleOutputStreamOperator<Tuple2<String, Long>> resultStream = env
            .addSource(new FlinkKafkaConsumer<String>("s1", new SimpleStringSchema(), sourceProp))
            .flatMap(new FlatMapFunction<String, Tuple2<String, Long>>() {
                @Override
                public void flatMap(String value, Collector<Tuple2<String, Long>> out) throws Exception {
                    for (String word : value.split(" ")) {
                        out.collect(Tuple2.of(word, 1L));
                    }
                }
            })
            .keyBy(t -> t.f0)
            .sum(1);
        
        resultStream
            .addSink(new FlinkKafkaProducer<Tuple2<String, Long>>(
                "default",
                new KafkaSerializationSchema<Tuple2<String, Long>>() {
                    @Override
                    public ProducerRecord<byte[], byte[]> serialize(Tuple2<String, Long> element,
                                                                    @Nullable Long timestamp) {
                        return new ProducerRecord<>("s2", (element.f0 + "_" + element.f1).getBytes(StandardCharsets.UTF_8));
                    }
                },
                sinkProp,
                FlinkKafkaProducer.Semantic.EXACTLY_ONCE
            ));
        
        resultStream
            .addSink(new SinkFunction<Tuple2<String, Long>>() {
                @Override
                public void invoke(Tuple2<String, Long> value, Context context) throws Exception {
                    if (value.f0.contains("x")) {
                        throw new RuntimeException("故意抛个异常");
                    }
                }
            });
        
        env.execute();
    }
    
}
